title: “gender pay gap uk”
author: “Tomasz Olczyk”
date: “3/30/2024”
output: html_document
params:
category:
value: all
choices: [all, admin & organisation,
care & education, creative & media, law & justice,
manual work, sales & serving others, science, tech & engineering,
senior managers & execs]
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pay_gap <- read_csv("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/case%20studies%20/pay_gap_uk.csv")
## Rows: 81 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): occupation, category, pay_gap_as_a_percentage
## dbl (3): women_average_annual_salary, men_average_annual_salary, pay_gap
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(pay_gap)
## # A tibble: 6 × 6
##   occupation      category women_average_annual…¹ men_average_annual_s…² pay_gap
##   <chr>           <chr>                     <dbl>                  <dbl>   <dbl>
## 1 Admin & organi… admin &…                  20272                  24691    4419
## 2 Receptionists   admin &…                  12009                  13281    1272
## 3 Secretaries     admin &…                  14614                  15315     701
## 4 Admin           admin &…                  14594                  18729    4135
## 5 Stock control   admin &…                  17271                  20538    3267
## 6 Government adm… admin &…                  19287                  23047    3760
## # ℹ abbreviated names: ¹​women_average_annual_salary, ²​men_average_annual_salary
## # ℹ 1 more variable: pay_gap_as_a_percentage <chr>
dane_gap <- pay_gap %>%
  select(occupation, 
        women = women_average_annual_salary, 
         men = men_average_annual_salary,
        pay_gap,
        category) %>%
  mutate(category = factor(category))
dane_long <- pay_gap %>%
  select(occupation, 
         women = women_average_annual_salary, 
         men =men_average_annual_salary, 
         pay_gap,
         category)  %>%
  pivot_longer(cols = c(women, men), 
               names_to  = "gender", 
               values_to = "salary") %>%
  mutate(category = factor(category))
head(dane_long)
## # A tibble: 6 × 5
##   occupation           pay_gap category             gender salary
##   <chr>                  <dbl> <fct>                <chr>   <dbl>
## 1 Admin & organisation    4419 admin & organisation women   20272
## 2 Admin & organisation    4419 admin & organisation men     24691
## 3 Receptionists           1272 admin & organisation women   12009
## 4 Receptionists           1272 admin & organisation men     13281
## 5 Secretaries              701 admin & organisation women   14614
## 6 Secretaries              701 admin & organisation men     15315

kolory <-

kolor1 <- rgb(80, 27, 60, maxColorValue = 255)
kolor2 <- rgb(186, 195, 106, maxColorValue = 255)

skala <- c(kolor1, kolor2)

wykres według pensji

x <- 
ggplot(data = dane_long %>% 
         filter(category == params$category), aes(y=reorder(occupation, salary))) +
  geom_segment(data = dane_gap %>% 
         filter(category == params$category), 
         aes(y = occupation, 
                                    yend=occupation, 
                                    x= women, 
                                    xend = men)) +
  geom_point(aes(x = salary, colour = gender)) +
  scale_color_manual(values = skala, guide_legend(position = "top")) +
  theme_void() 
x

wykres według pay gap

y <- dane_long %>% 
  filter(category == params$category) %>% 
  ggplot( aes(y=reorder(occupation, pay_gap))) +
  geom_segment(data = dane_gap, aes(y = occupation, 
                                    yend=occupation, 
                                    x= women, 
                                    xend = men)) +
  geom_point(aes(x = salary, colour = gender)) 
y

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(y)